df <- read_csv("https://raw.githubusercontent.com/juka19/tad_assignment3/main/data/data_11_28.csv")
## New names:
## Rows: 920 Columns: 13
## -- Column specification
## -------------------------------------------------------- Delimiter: "," chr
## (3): subjects, summary, policy_area dbl (8): ...1, ...2, ...3, Unnamed: 0, bill
## number, cosponsor_D_perc, cospo... date (2): latest_action, date
## i Use `spec()` to retrieve the full column specification for this data. i
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## * `` -> `...1`
## * `...1` -> `...2`
## * `...2` -> `...3`
#if two thirds of the sponsors are democrats, we consider the bill democrat-dominated
#same for republicans
#if there is no clear majority, they are "Both"
df$party <- ifelse(df$cosponsor_D_perc > 0.66, "Democrat", ifelse(df$cosponsor_R_perc > 0.66, "Republican", "Both"))
ggplot(df, aes(x = cosponsor_D_perc)) +
geom_histogram(aes(y=..density..), colour="black", fill="white") +
geom_density(alpha=.1, fill="blue") +
labs(title="Density of bill cosposor party",
x ="Cosponsor party composition", y = "Density",
caption = "Numbers represent proportion of cosponsors from Democratic party,
so 0.0 represents bills that were fully Republican and 1.0 represents
bills that were fully Democrat.") +
theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
df_corp <- df
df_corp <- df_corp %>% rename(text = summary)
corp <- corpus(df_corp)
## Warning: NA is replaced by empty string
##Creatung a dfm from the corpus
dfmat <- corp %>%
tokens(remove_punct = TRUE, remove_numbers = TRUE, remove_symbols = TRUE) %>%
tokens_remove(patter = stopwords("en")) %>%
tokens_replace(pattern = lexicon::hash_lemmas$token, replacement = lexicon::hash_lemmas$lemma) %>%
tokens_wordstem() %>%
tokens_remove(c("sec","bill","act", "section", "funds", "shall","must", "use", "author","fund","provid","program","requir","divis","titl","appropri","specifi")) %>%
dfm()
#Do we also want to lemmatize and apply word stems??
# tokens_replace(pattern = lexicon::hash_lemmas$token, replacement = lexicon::hash_lemmas$lemma) %>%
# tokens_wordstem() %>%
##Most common words in the corpus
dfmat %>% colSums() %>% sort(decreasing = TRUE) %>% head(20)
## servic nation state prohibit includ feder depart certain
## 2407 2027 1966 1839 1805 1663 1582 1470
## report develop agenc relat health permit may activ
## 1445 1309 1255 1232 1209 1189 1188 1187
## oper congress u.s secur
## 1156 1156 1126 1124
#Wordcloud congress 115
dfmat_115 <- dfm_subset(dfmat, session == 115)
textplot_wordcloud(dfmat_115, max_words = 300)
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## necessari could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, : emerg
## could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## standard could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## corpor could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## assistanc could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## indian could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## mission could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## measur could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## inspector could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## record could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## coordin could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## capabl could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, : order
## could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## treatment could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## intern could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## school could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## special could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, : doe
## could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## period could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## condit could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## result could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## nuclear could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## strategi could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## recommend could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## investig could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## privat could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## reauthor could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## environment could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## agenci could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## distribut could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, : high
## could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, : prior
## could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## otherwis could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## address could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## educat could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, : nbsp
## could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## promot could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## fy2019 could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## student could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## exempt could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## agricultur could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, : usda
## could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## qualifi could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## licens could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## reimburs could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## balanc could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## competit could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## general could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, : labor
## could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## forest could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, :
## demonstr could not be fit on page. It will not be plotted.
## Warning in wordcloud(x, min_size, max_size, min_count, max_words, color, : good
## could not be fit on page. It will not be plotted.
#Wordcloud congress 116
dfmat_116 <- dfm_subset(dfmat, session == 116)
textplot_wordcloud(dfmat_116, max_words = 300)
#Wordcloud congress 117
dfmat_117 <- dfm_subset(dfmat, session == 117)
textplot_wordcloud(dfmat_117, max_words = 300)
#Wordcloud comparing Democrats and Republicans
dfmatRepDem <- dfm(corp, remove = stopwords("english"), remove_numbers = TRUE, remove_punct = TRUE, groups =
corp$party) %>%
dfm_remove(c("sec","bill","act", "section", "funds", "shall","must", "used")) %>%
dfm_trim(min_termfreq = 3)
## Warning: 'dfm.corpus()' is deprecated. Use 'tokens()' first.
## Warning: '...' should not be used for tokens() arguments; use 'tokens()' first.
## Warning: 'groups' is deprecated; use dfm_group() instead
## Warning: 'remove' is deprecated; use dfm_remove() instead
textplot_wordcloud(dfmatRepDem, comparison = TRUE, max_words = 300,
color = c("green","blue", "red"))
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## debris could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## museum could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## designates could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## specifically could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## additionally could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## weapons could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## operation could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## procurement could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## justice could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## post could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## assessment could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## sustainment could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## civilian could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## reserve could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## juvenile could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## requested could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## biosimilar could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## ensure could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## duty could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## management could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## enter could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## design could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## sexual could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## virginia could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## availability could not be fit on page. It will not be plotted.
#Wordcloud congress parties 115
corp_115 <- df %>% filter(session == 115) %>% rename(text = summary) %>% corpus()
modelpart15 <- dfm(corp_115, remove = stopwords("english"),remove_numbers = TRUE, remove_punct = TRUE, groups = corp_115$party) %>%
dfm_remove(c("sec","bill","act", "section", "funds", "shall","must", "used")) %>%
dfm_trim(min_termfreq = 3)
## Warning: 'dfm.corpus()' is deprecated. Use 'tokens()' first.
## Warning: '...' should not be used for tokens() arguments; use 'tokens()' first.
## Warning: 'groups' is deprecated; use dfm_group() instead
## Warning: 'remove' is deprecated; use dfm_remove() instead
mp15 <- textplot_wordcloud(modelpart15, comparison = TRUE, max_words = 300,
color = c("green","blue", "red"))
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## installations could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## applications could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## industrial could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## california could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## comprehensive could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## capability could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## delinquency could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## contract could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## merchant could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## fy2020 could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## policies could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## operation could not be fit on page. It will not be plotted.
mp15
## $xlog
## [1] FALSE
##
## $ylog
## [1] FALSE
##
## $adj
## [1] 0.5
##
## $ann
## [1] TRUE
##
## $ask
## [1] FALSE
##
## $bg
## [1] "white"
##
## $bty
## [1] "o"
##
## $cex
## [1] 1
##
## $cex.axis
## [1] 1
##
## $cex.lab
## [1] 1
##
## $cex.main
## [1] 1.2
##
## $cex.sub
## [1] 1
##
## $col
## [1] "black"
##
## $col.axis
## [1] "black"
##
## $col.lab
## [1] "black"
##
## $col.main
## [1] "black"
##
## $col.sub
## [1] "black"
##
## $crt
## [1] 0
##
## $err
## [1] 0
##
## $family
## [1] ""
##
## $fg
## [1] "black"
##
## $fig
## [1] 0 1 0 1
##
## $fin
## [1] 6.999999 4.999999
##
## $font
## [1] 1
##
## $font.axis
## [1] 1
##
## $font.lab
## [1] 1
##
## $font.main
## [1] 2
##
## $font.sub
## [1] 1
##
## $lab
## [1] 5 5 7
##
## $las
## [1] 0
##
## $lend
## [1] "round"
##
## $lheight
## [1] 1
##
## $ljoin
## [1] "round"
##
## $lmitre
## [1] 10
##
## $lty
## [1] "solid"
##
## $lwd
## [1] 1
##
## $mai
## [1] 0 0 0 0
##
## $mar
## [1] 5.1 4.1 4.1 2.1
##
## $mex
## [1] 1
##
## $mfcol
## [1] 1 1
##
## $mfg
## [1] 1 1 1 1
##
## $mfrow
## [1] 1 1
##
## $mgp
## [1] 3 1 0
##
## $mkh
## [1] 0.001
##
## $new
## [1] TRUE
##
## $oma
## [1] 0 0 0 0
##
## $omd
## [1] 0 1 0 1
##
## $omi
## [1] 0 0 0 0
##
## $pch
## [1] 1
##
## $pin
## [1] 5.759999 3.159999
##
## $plt
## [1] 0.08857144 0.91142856 0.18400002 0.81599998
##
## $ps
## [1] 12
##
## $pty
## [1] "m"
##
## $smo
## [1] 1
##
## $srt
## [1] 0
##
## $tck
## [1] NA
##
## $tcl
## [1] -0.5
##
## $usr
## [1] -0.4072 1.4072 -0.1480 1.1480
##
## $xaxp
## [1] 0 1 5
##
## $xaxs
## [1] "r"
##
## $xaxt
## [1] "s"
##
## $xpd
## [1] FALSE
##
## $yaxp
## [1] 0 1 5
##
## $yaxs
## [1] "r"
##
## $yaxt
## [1] "s"
##
## $ylbias
## [1] 0.2
#Wordcloud congress parties 116
corp_116 <- df %>% filter(session == 116) %>% rename(text = summary) %>% corpus()
modelpart16 <- dfm(corp_116, remove = stopwords("english"), remove_numbers = TRUE, remove_punct = TRUE, groups = corp_116$party) %>% dfm_remove(c("sec","bill","act", "section", "funds", "shall","must", "used")) %>%
dfm_trim(min_termfreq = 3)
## Warning: 'dfm.corpus()' is deprecated. Use 'tokens()' first.
## Warning: '...' should not be used for tokens() arguments; use 'tokens()' first.
## Warning: 'groups' is deprecated; use dfm_group() instead
## Warning: 'remove' is deprecated; use dfm_remove() instead
mp16 <- textplot_wordcloud(modelpart16, comparison = TRUE, max_words = 300,
color = c("green","blue", "red"))
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## regarding could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## community could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## members could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## payee could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## specialists could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## report could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## courthouse could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## years could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## medicaid could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## representative could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## access could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## identifying could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## building could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## payment could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## department could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## development could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## identify could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## whether could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## among could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## agriculture could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## status could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## update could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## financial could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## lessee could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## high-security could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## generative could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## issuer could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## current could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## protection could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## specifically could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## methods could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## improper could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## islands could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## public could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## accountability could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## educational could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## support could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## exemption could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## charge could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## economic could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## memorial could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## improving could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## three could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## develop could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## entering could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## board could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## standards could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## agricultural could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## homeland could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## technology could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## gsa could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## interests could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## highest-level could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## sensitivity could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## purity could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## outputs could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## boys could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## apprenticeship could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## trusted could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## confidential could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## merchant could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## exercise could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## immediate could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## narcotics could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## national could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## west could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## arkansas could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## travel could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## developing could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## devices could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## lawfully could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## percentage could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## provide could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## tanf could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## remote could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## port could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## world could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## black could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## obtain could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## medal could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## plans could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## accounting could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## screening could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## working could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## appointment could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## steps could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## owner could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## providers could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## detection could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## protecting could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## securities could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## authority could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## retirement could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## international could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## similar could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## open could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## group could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## columbia could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## integrity could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## contain could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## long-term could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## maintain could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## reprogramming could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## community-based could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## science could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## bureau could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## commercial could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## reduces could not be fit on page. It will not be plotted.
mp16
## $xlog
## [1] FALSE
##
## $ylog
## [1] FALSE
##
## $adj
## [1] 0.5
##
## $ann
## [1] TRUE
##
## $ask
## [1] FALSE
##
## $bg
## [1] "white"
##
## $bty
## [1] "o"
##
## $cex
## [1] 1
##
## $cex.axis
## [1] 1
##
## $cex.lab
## [1] 1
##
## $cex.main
## [1] 1.2
##
## $cex.sub
## [1] 1
##
## $col
## [1] "black"
##
## $col.axis
## [1] "black"
##
## $col.lab
## [1] "black"
##
## $col.main
## [1] "black"
##
## $col.sub
## [1] "black"
##
## $crt
## [1] 0
##
## $err
## [1] 0
##
## $family
## [1] ""
##
## $fg
## [1] "black"
##
## $fig
## [1] 0 1 0 1
##
## $fin
## [1] 6.999999 4.999999
##
## $font
## [1] 1
##
## $font.axis
## [1] 1
##
## $font.lab
## [1] 1
##
## $font.main
## [1] 2
##
## $font.sub
## [1] 1
##
## $lab
## [1] 5 5 7
##
## $las
## [1] 0
##
## $lend
## [1] "round"
##
## $lheight
## [1] 1
##
## $ljoin
## [1] "round"
##
## $lmitre
## [1] 10
##
## $lty
## [1] "solid"
##
## $lwd
## [1] 1
##
## $mai
## [1] 0 0 0 0
##
## $mar
## [1] 5.1 4.1 4.1 2.1
##
## $mex
## [1] 1
##
## $mfcol
## [1] 1 1
##
## $mfg
## [1] 1 1 1 1
##
## $mfrow
## [1] 1 1
##
## $mgp
## [1] 3 1 0
##
## $mkh
## [1] 0.001
##
## $new
## [1] TRUE
##
## $oma
## [1] 0 0 0 0
##
## $omd
## [1] 0 1 0 1
##
## $omi
## [1] 0 0 0 0
##
## $pch
## [1] 1
##
## $pin
## [1] 5.759999 3.159999
##
## $plt
## [1] 0.08857144 0.91142856 0.18400002 0.81599998
##
## $ps
## [1] 12
##
## $pty
## [1] "m"
##
## $smo
## [1] 1
##
## $srt
## [1] 0
##
## $tck
## [1] NA
##
## $tcl
## [1] -0.5
##
## $usr
## [1] -0.4072 1.4072 -0.1480 1.1480
##
## $xaxp
## [1] 0 1 5
##
## $xaxs
## [1] "r"
##
## $xaxt
## [1] "s"
##
## $xpd
## [1] FALSE
##
## $yaxp
## [1] 0 1 5
##
## $yaxs
## [1] "r"
##
## $yaxt
## [1] "s"
##
## $ylbias
## [1] 0.2
#Wordcloud congress parties 117
corp_117 <- df %>% filter(session == 117) %>% rename(text = summary) %>% corpus()
## Warning: NA is replaced by empty string
modelpart17 <- dfm(corp_117, remove = stopwords("english"), remove_numbers = TRUE, remove_punct = TRUE, groups = corp_117$party) %>% dfm_remove(c("sec","bill","act", "section", "funds", "shall","must", "used")) %>%
dfm_trim(min_termfreq = 3)
## Warning: 'dfm.corpus()' is deprecated. Use 'tokens()' first.
## Warning: '...' should not be used for tokens() arguments; use 'tokens()' first.
## Warning: 'groups' is deprecated; use dfm_group() instead
## Warning: 'remove' is deprecated; use dfm_remove() instead
mp17 <- textplot_wordcloud(modelpart17, comparison = TRUE, max_words = 300,
color = c("green","blue", "red"))
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## expedited could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## countries could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## mountain could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## consolidation could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## cacfp could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## government could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## animals could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## management could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## charles could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## peace could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## original could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## waivers could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## national could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## human could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## goods could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## meals could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## location could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## produced could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## commonly could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## program could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## bankruptcy could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## south could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## entities could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## biological could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## businesses could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## registration could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## employment could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## workforce could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## modification could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## small could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## working could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## acquisition could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## trafficking could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## business could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## contract could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## schools could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## participation could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## student could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## educational could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## international could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## lifespan could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## respite could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## humanity could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## exchanged could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## artificial could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## snacks could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## taiwan's could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## wha could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## china could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## shadow could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## wolves could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## methamphetamine could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## fact could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## sheets could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## justification could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## jimmy could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## carter could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## separate could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## cents could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## meal could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## maximum could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## rights could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## least could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## performing could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## certificate could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## airport could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## patents could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## class could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## major could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## include could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## minority could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## defense could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## homeland could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## reimbursement could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## using could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## joint could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## importation could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## lease could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## requirements could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## retraining could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## best could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## appropriate could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## colonel could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## main could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## patent could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## impact could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## education could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## address could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## commemorative could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## concerning could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## projects could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## historical could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## intelligence could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## strategy could not be fit on page. It will not be plotted.
mp17
## $xlog
## [1] FALSE
##
## $ylog
## [1] FALSE
##
## $adj
## [1] 0.5
##
## $ann
## [1] TRUE
##
## $ask
## [1] FALSE
##
## $bg
## [1] "white"
##
## $bty
## [1] "o"
##
## $cex
## [1] 1
##
## $cex.axis
## [1] 1
##
## $cex.lab
## [1] 1
##
## $cex.main
## [1] 1.2
##
## $cex.sub
## [1] 1
##
## $col
## [1] "black"
##
## $col.axis
## [1] "black"
##
## $col.lab
## [1] "black"
##
## $col.main
## [1] "black"
##
## $col.sub
## [1] "black"
##
## $crt
## [1] 0
##
## $err
## [1] 0
##
## $family
## [1] ""
##
## $fg
## [1] "black"
##
## $fig
## [1] 0 1 0 1
##
## $fin
## [1] 6.999999 4.999999
##
## $font
## [1] 1
##
## $font.axis
## [1] 1
##
## $font.lab
## [1] 1
##
## $font.main
## [1] 2
##
## $font.sub
## [1] 1
##
## $lab
## [1] 5 5 7
##
## $las
## [1] 0
##
## $lend
## [1] "round"
##
## $lheight
## [1] 1
##
## $ljoin
## [1] "round"
##
## $lmitre
## [1] 10
##
## $lty
## [1] "solid"
##
## $lwd
## [1] 1
##
## $mai
## [1] 0 0 0 0
##
## $mar
## [1] 5.1 4.1 4.1 2.1
##
## $mex
## [1] 1
##
## $mfcol
## [1] 1 1
##
## $mfg
## [1] 1 1 1 1
##
## $mfrow
## [1] 1 1
##
## $mgp
## [1] 3 1 0
##
## $mkh
## [1] 0.001
##
## $new
## [1] TRUE
##
## $oma
## [1] 0 0 0 0
##
## $omd
## [1] 0 1 0 1
##
## $omi
## [1] 0 0 0 0
##
## $pch
## [1] 1
##
## $pin
## [1] 5.759999 3.159999
##
## $plt
## [1] 0.08857144 0.91142856 0.18400002 0.81599998
##
## $ps
## [1] 12
##
## $pty
## [1] "m"
##
## $smo
## [1] 1
##
## $srt
## [1] 0
##
## $tck
## [1] NA
##
## $tcl
## [1] -0.5
##
## $usr
## [1] -0.4072 1.4072 -0.1480 1.1480
##
## $xaxp
## [1] 0 1 5
##
## $xaxs
## [1] "r"
##
## $xaxt
## [1] "s"
##
## $xpd
## [1] FALSE
##
## $yaxp
## [1] 0 1 5
##
## $yaxs
## [1] "r"
##
## $yaxt
## [1] "s"
##
## $ylbias
## [1] 0.2
#Wordcloud comparing Congresses
dfmatCon <- dfm(corp, remove = stopwords("english"), remove_numbers = TRUE, remove_punct = TRUE, groups = corp$session) %>%dfm_remove(c("sec","bill","act", "section", "funds", "shall","must", "used")) %>%
dfm_trim(min_termfreq = 3)
## Warning: 'dfm.corpus()' is deprecated. Use 'tokens()' first.
## Warning: '...' should not be used for tokens() arguments; use 'tokens()' first.
## Warning: 'groups' is deprecated; use dfm_group() instead
## Warning: 'remove' is deprecated; use dfm_remove() instead
textplot_wordcloud(dfmatCon, comparison = TRUE, max_words = 300,
color = c("blue", "red"))
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## prescription could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## clinic could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## benefit could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## home could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## women could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## climate could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## products could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## january could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## electricity could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## communities could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## plan could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## cybersecurity could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## infrastructure could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## rebates could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## fgm could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## low-income could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## park could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## biosimilar could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## children could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## substances could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## schools could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## capitol could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## homeless could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## usda could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## associated could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## counseling could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## financial could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## qualifying could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## served could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## sexual could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## leave could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## rural could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## historic could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## post could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## coverage could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## rebate could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## cancer could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## locations could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## electric could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## presumption could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## fda's could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## materials could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## institution could not be fit on page. It will not be plotted.
## Warning in wordcloud_comparison(x, min_size, max_size, min_count, max_words, :
## conduct could not be fit on page. It will not be plotted.
#Overall most common words
tfreq <- dfmat %>%
textstat_frequency() %>%
head(20)
tfreq$feature <- factor(tfreq$feature, levels=tfreq$feature)
ggplot(tfreq, aes(x=frequency, y=feature)) +
geom_col() + labs(title="Feature (word) frequency",
x ="Frequency", y = "Feature") +
theme_minimal()
## Most common words by party
#Republicans
dfmat_r <- dfm_subset(dfmat, party == "Republican")
tfreq_r <- dfmat_r %>%
textstat_frequency() %>%
head(20)
tfreq_r$feature <- factor(tfreq_r$feature, levels=tfreq_r$feature)
ggplot(tfreq_r, aes(x=frequency, y=feature)) +
geom_col() + labs(title="Feature (word) frequency for Republican Bills",
x ="Frequency", y = "Feature") +
theme_minimal()
#Democrats
dfmat_d <- dfm_subset(dfmat, party == "Democrat")
tfreq_d <- dfmat_d %>%
textstat_frequency() %>%
head(20)
tfreq_d$feature <- factor(tfreq_d$feature, levels=tfreq_d$feature)
ggplot(tfreq_d, aes(x=frequency, y=feature)) +
geom_col() + labs(title="Feature (word) frequency for Democrat Bills",
x ="Frequency", y = "Feature") +
theme_minimal()
party_corp <- corpus_subset(corp,
party %in% c("Democrat", "Republican"))
# Create a dfm grouped by party
party_dfm <- tokens(party_corp, remove_punct = TRUE, remove_numbers = TRUE, remove_symbols = TRUE) %>%
tokens_remove(patter = stopwords("en")) %>%
tokens_replace(pattern = lexicon::hash_lemmas$token, replacement = lexicon::hash_lemmas$lemma) %>%
tokens_wordstem() %>%
tokens_remove(c("sec","bill","act", "section", "funds", "shall","must", "use", "author", "may")) %>%
tokens_group(groups = party) %>%
dfm()
# Calculate keyness and determine Republican as target group
result_keyness <- textstat_keyness(party_dfm, target = "Republican")
# Plot estimated word keyness
textplot_keyness(result_keyness)
# Most common words by Congressional session 115
tfreq_115 <- dfmat_115 %>%
textstat_frequency() %>%
head(20)
tfreq_115$feature <- factor(tfreq_115$feature, levels=tfreq_115$feature)
ggplot(tfreq_115, aes(x=frequency, y=feature)) +
geom_col() + labs(title="Feature (word) frequency for 115 session (2017 - 2018)",
x ="Frequency", y = "Feature") +
theme_minimal()
# Most common words by Congressional session 116
tfreq_116 <- dfmat_116 %>%
textstat_frequency() %>%
head(20)
tfreq_116$feature <- factor(tfreq_116$feature, levels=tfreq_116$feature)
ggplot(tfreq_116, aes(x=frequency, y=feature)) +
geom_col() + labs(title="Feature (word) frequency for 116 session (2019 - 2020)",
x ="Frequency", y = "Feature") +
theme_minimal()
# Most common words by Congressional session 117
tfreq_117 <- dfmat_117 %>%
textstat_frequency() %>%
head(20)
tfreq_117$feature <- factor(tfreq_117$feature, levels=tfreq_117$feature)
ggplot(tfreq_117, aes(x=frequency, y=feature)) +
geom_col() + labs(title="Feature (word) frequency for 117 session (2021 - 2022)",
x ="Frequency", y = "Feature") +
theme_minimal()
##Comparing subgroups
df <- df %>%
mutate(session_written = ifelse(session == 115, "session_115",
ifelse(session == 116, "session_116",
ifelse(session == 117, "session_117", NA))))
ytfreq <- dfmat %>%
textstat_frequency(groups = df$session_written) %>%
pivot_wider(id_cols=feature, names_from=group, values_from=frequency)
#Comparing sessions 115 and 116
#Plot #1: differences in frequency by session
ggplot(ytfreq, aes(x= session_116, y= session_115)) +
geom_point() + theme_minimal()
## Warning: Removed 5263 rows containing missing values (geom_point).
#Plot #2: coloring changes by session
ytfreq$change_115_116 <- log(ytfreq$session_116 / ytfreq$session_115)
max_change <- max(abs(ytfreq$change_115_116), na.rm=TRUE)
ggplot(ytfreq, aes(x=session_116, y=session_115, fill=change_115_116)) +
geom_point(color="grey", shape=21) +
scale_fill_gradientn(
colors = c("#4575b4","white","#d73027"),
values = scales::rescale(c(max_change*-1,0,max_change)), limits = c(max_change*-1,max_change))+
theme_bw()
## Warning: Removed 5263 rows containing missing values (geom_point).
#Plot 3: labeling words with biggest change
labels <- ytfreq %>%
rowwise() %>%
mutate(max_value = max(session_116, session_115)) %>%
filter((abs(change_115_116)>0.4 & max_value>2.5) )
ggplot(ytfreq, aes(x=session_116, y=session_115, fill=change_115_116)) +
geom_point(color="grey", shape=21) +
scale_fill_gradientn(
colors = c("#4575b4","white","#d73027"),
values = scales::rescale(c(max_change*-1,0,max_change)), limits = c(max_change*-1,max_change)) + theme_bw() +
geom_label_repel(data=labels, aes(label=feature), min.segment.length = 0)
## Warning: Removed 5263 rows containing missing values (geom_point).
## Warning: ggrepel: 2696 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
#Comparing sessions 116 and 117
ggplot(ytfreq, aes(x= session_117, y= session_116)) +
geom_point() + theme_minimal()
## Warning: Removed 6716 rows containing missing values (geom_point).
#Plot #2: coloring changes by session
ytfreq$change_116_117 <- log(ytfreq$session_117 / ytfreq$session_116)
max_change <- max(abs(ytfreq$change_116_117), na.rm=TRUE)
ggplot(ytfreq, aes(x=session_117, y=session_116, fill=change_116_117)) +
geom_point(color="grey", shape=21) +
scale_fill_gradientn(
colors = c("#4575b4","white","#d73027"),
values = scales::rescale(c(max_change*-1,0,max_change)), limits = c(max_change*-1,max_change))+
theme_bw()
## Warning: Removed 6716 rows containing missing values (geom_point).
#Plot 3: labeling words with biggest change
labels <- ytfreq %>%
rowwise() %>%
mutate(max_value = max(session_117, session_116)) %>%
filter((abs(change_116_117)>0.4 & max_value>2.5) )
ggplot(ytfreq, aes(x=session_117, y=session_116, fill=change_116_117)) +
geom_point(color="grey", shape=21) +
scale_fill_gradientn(
colors = c("#4575b4","white","#d73027"),
values = scales::rescale(c(max_change*-1,0,max_change)), limits = c(max_change*-1,max_change)) + theme_bw() +
geom_label_repel(data=labels, aes(label=feature), min.segment.length = 0)
## Warning: Removed 6716 rows containing missing values (geom_point).
## Warning: ggrepel: 1662 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# sims <- textstat_simil(dfmat, method="cosine")
#
# sim_df <- as.data.frame(sims, upper = TRUE)
#
# sim_df
##Dimensionality plotting
corp2 <- corpus(df$summary)
## Warning: NA is replaced by empty string
dfmat2 <- corp2 %>%
tokens(remove_punct = TRUE) %>%
tokens_remove(patter = stopwords("en")) %>%
dfm() %>%
dfm_trim(min_termfreq = 5)
embeddings <- umap(as.matrix(dfmat2))
df$x <- embeddings[,1]
df$y <- embeddings[,2]
colordict <- c( "Democrat"="blue","Republican"="red", "Both"="yellow")
p <- ggplot(df, aes(x, y, fill=party)) +
geom_point(color="grey", shape=21, size=0.5) +
scale_fill_manual(values=colordict) +
theme_bw()
p
ggplotly(p)
df1 <- df %>%
mutate(party_full = ifelse(cosponsor_D_perc == 1.0, "Dem",
ifelse(cosponsor_R_perc == 1.0, "Rep", NA))) %>%
drop_na(party_full)
corp3 <- corpus(df1$summary)
dfmat3 <- corp3 %>%
tokens(remove_punct = TRUE) %>%
tokens_remove(patter = stopwords("en")) %>%
dfm() %>%
dfm_trim(min_termfreq = 5)
embeddings2 <- umap(as.matrix(dfmat3))
df1$x <- embeddings2[,1]
df1$y <- embeddings2[,2]
colordict2 <- c( "Democrat"="blue","Republican"="red")
j <- ggplot(df1, aes(x, y, fill=party)) +
geom_point(color="grey", shape=21, size=0.5) +
scale_fill_manual(values=colordict2) +
theme_bw()
j
ggplotly(j)
##Topic modeling
#Topics for session 115 (2017 - 2018)
lda_115 <- LDA(dfmat_115, 5)
topic_words_115 <- tidy(lda_115, matrix="beta") %>%
group_by(topic) %>%
slice_max(beta, n = 10) %>%
ungroup() %>%
arrange(topic, -beta)
topic_words_115
## # A tibble: 50 x 3
## topic term beta
## <int> <chr> <dbl>
## 1 1 prohibit 0.0166
## 2 1 permit 0.0115
## 3 1 servic 0.0105
## 4 1 nation 0.00919
## 5 1 feder 0.00786
## 6 1 includ 0.00783
## 7 1 agenc 0.00734
## 8 1 state 0.00723
## 9 1 congress 0.00691
## 10 1 depart 0.00674
## # ... with 40 more rows
topic_words_115 %>%
mutate(term = reorder_within(term, beta, topic)) %>%
ggplot(aes(beta, term, fill = factor(topic))) +
geom_col(show.legend = FALSE) +
facet_wrap(~ topic, scales = "free") + scale_y_reordered() + theme_minimal()
#Topics for session 116 (2019 - 2020)
lda_116 <- LDA(dfmat_116, 5)
topic_words_116 <- tidy(lda_116, matrix="beta") %>%
group_by(topic) %>%
slice_max(beta, n = 10) %>%
ungroup() %>%
arrange(topic, -beta)
topic_words_116
## # A tibble: 50 x 3
## topic term beta
## <int> <chr> <dbl>
## 1 1 prohibit 0.0248
## 2 1 permit 0.0146
## 3 1 servic 0.0114
## 4 1 feder 0.0114
## 5 1 agenc 0.0105
## 6 1 oper 0.00960
## 7 1 congress 0.00916
## 8 1 nation 0.00906
## 9 1 transfer 0.00848
## 10 1 certain 0.00769
## # ... with 40 more rows
topic_words_116 %>%
mutate(term = reorder_within(term, beta, topic)) %>%
ggplot(aes(beta, term, fill = factor(topic))) +
geom_col(show.legend = FALSE) +
facet_wrap(~ topic, scales = "free") + scale_y_reordered() + theme_minimal()
#Topics for session 117 (2021 - 2022)
raw.sum = apply(dfmat_117,1,FUN=sum) #Removing non-0 rows from dfmat
dfmat_117 = dfmat_117[raw.sum != 0, ]
lda_117 <- LDA(dfmat_117, 5)
topic_words_117 <- tidy(lda_117, matrix="beta") %>%
group_by(topic) %>%
slice_max(beta, n = 10) %>%
ungroup() %>%
arrange(topic, -beta)
topic_words_117
## # A tibble: 50 x 3
## topic term beta
## <int> <chr> <dbl>
## 1 1 feder 0.0131
## 2 1 depart 0.0114
## 3 1 agenc 0.0105
## 4 1 includ 0.00947
## 5 1 nation 0.00866
## 6 1 govern 0.00790
## 7 1 establish 0.00756
## 8 1 may 0.00742
## 9 1 state 0.00706
## 10 1 report 0.00664
## # ... with 40 more rows
topic_words_117 %>%
mutate(term = reorder_within(term, beta, topic)) %>%
ggplot(aes(beta, term, fill = factor(topic))) +
geom_col(show.legend = FALSE) +
facet_wrap(~ topic, scales = "free") + scale_y_reordered() + theme_minimal()
##Sentiments
#Loading df with VADER compount scores
summary_sentiment <- read_csv("https://raw.githubusercontent.com/juka19/tad_assignment3/main/data/data_w_vader.csv")
## New names:
## Rows: 920 Columns: 18
## -- Column specification
## -------------------------------------------------------- Delimiter: "," chr
## (3): subjects, summary, policy_area dbl (13): ...1, Unnamed: 0, ...3, ...4,
## Unnamed: 0.1, bill number, cosponso... date (2): latest_action, date
## i Use `spec()` to retrieve the full column specification for this data. i
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## * `` -> `...1`
## * `...1` -> `...3`
## * `...2` -> `...4`
ggplot(summary_sentiment, aes(x = compound)) +
geom_histogram(aes(y=..density..), colour="black", fill="white") +
geom_density(alpha=.1, fill="blue") +
labs(title="Density of compound sentiment scores",
x ="Sentiment scores", y = "Density") +
theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (stat_bin).
## Warning: Removed 1 rows containing non-finite values (stat_density).
summary_sentiment$party <- ifelse(summary_sentiment$cosponsor_D_perc > 0.66, "Democrat", ifelse(summary_sentiment$cosponsor_R_perc > 0.66, "Republican", "Both"))
wide_sentiment <- summary_sentiment %>%
group_by(party, date) %>%
summarise(score = mean(compound)) %>%
pivot_wider(names_from = party, values_from = score) %>%
select(-c("Both", "NA"))
## `summarise()` has grouped output by 'party'. You can override using the
## `.groups` argument.
days <- data.frame(date = seq(as.Date("2017-01-01"),as.Date("2022-12-31"),1))
daily_sentiment <- days %>%
left_join(wide_sentiment) %>%
pivot_longer(cols = -date, names_to="party", values_to="score") %>%
group_by(party) %>%
arrange(date) %>%
mutate(score7 = data.table::frollmean(score, 7))
## Joining, by = "date"
ggplot(daily_sentiment, aes(x=date, y = score, colour=party)) +
geom_point(aes(y=score), size=1) +
geom_line(aes(y=score7)) +
theme_minimal() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 4074 rows containing non-finite values (stat_smooth).
## Warning: Removed 4074 rows containing missing values (geom_point).
## Warning: Removed 4382 row(s) containing missing values (geom_path).
summary_sentiment$party_full <- ifelse(summary_sentiment$cosponsor_D_perc == 1.0, "Dem", ifelse(summary_sentiment$cosponsor_R_perc == 1.0, "Rep", NA))
wide_sentiment_full <- summary_sentiment %>%
drop_na(party_full) %>%
group_by(party_full, date) %>%
summarise(score = mean(compound)) %>%
pivot_wider(names_from = party_full, values_from = score)
## `summarise()` has grouped output by 'party_full'. You can override using the
## `.groups` argument.
days_115 <- data.frame(date = seq(as.Date("2017-01-01"),as.Date("2018-12-31"),1))
daily_sentiment_115 <- days_115 %>%
left_join(wide_sentiment_full) %>%
pivot_longer(cols = -date, names_to="party_full", values_to="score") %>%
group_by(party_full) %>%
arrange(date) %>%
mutate(score7 = data.table::frollmean(score, 7))
## Joining, by = "date"
daily_sentiment %>% head()
## # A tibble: 6 x 4
## # Groups: party [2]
## date party score score7
## <date> <chr> <dbl> <dbl>
## 1 2017-01-01 Democrat NA NA
## 2 2017-01-01 Republican NA NA
## 3 2017-01-02 Democrat NA NA
## 4 2017-01-02 Republican NA NA
## 5 2017-01-03 Democrat NA NA
## 6 2017-01-03 Republican NA NA
ggplot(daily_sentiment_115, aes(x=date, y = score, colour=party_full)) +
geom_point(aes(y=score), size=1) +
geom_line(aes(y=score7)) +
theme_minimal() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1402 rows containing non-finite values (stat_smooth).
## Warning: Removed 1402 rows containing missing values (geom_point).
## Warning: Removed 1460 row(s) containing missing values (geom_path).
days_116 <- data.frame(date = seq(as.Date("2019-01-01"),as.Date("2020-12-31"),1))
daily_sentiment_116 <- days_116 %>%
left_join(wide_sentiment_full) %>%
pivot_longer(cols = -date, names_to="party_full", values_to="score") %>%
group_by(party_full) %>%
arrange(date) %>%
mutate(score7 = data.table::frollmean(score, 7))
## Joining, by = "date"
daily_sentiment_116 %>% head()
## # A tibble: 6 x 4
## # Groups: party_full [2]
## date party_full score score7
## <date> <chr> <dbl> <dbl>
## 1 2019-01-01 Dem NA NA
## 2 2019-01-01 Rep NA NA
## 3 2019-01-02 Dem NA NA
## 4 2019-01-02 Rep NA NA
## 5 2019-01-03 Dem 0.606 NA
## 6 2019-01-03 Rep 0.980 NA
ggplot(daily_sentiment_116, aes(x=date, y = score, colour=party_full)) +
geom_point(aes(y=score), size=1) +
geom_line(aes(y=score7)) +
theme_minimal() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1409 rows containing non-finite values (stat_smooth).
## Warning: Removed 1409 rows containing missing values (geom_point).
## Warning: Removed 1462 row(s) containing missing values (geom_path).
days_117 <- data.frame(date = seq(as.Date("2021-01-01"),as.Date("2022-12-31"),1))
daily_sentiment_117 <- days_117 %>%
left_join(wide_sentiment_full) %>%
pivot_longer(cols = -date, names_to="party_full", values_to="score") %>%
group_by(party_full) %>%
arrange(date) %>%
mutate(score7 = data.table::frollmean(score, 7))
## Joining, by = "date"
ggplot(daily_sentiment_117, aes(x=date, y = score, colour=party_full)) +
geom_point(aes(y=score), size=1) +
geom_line(aes(y=score7)) +
theme_minimal() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1420 rows containing non-finite values (stat_smooth).
## Warning: Removed 1420 rows containing missing values (geom_point).
## Warning: Removed 1460 row(s) containing missing values (geom_path).
##Some machine learning
#Prediction model to see if can predict if bill is Republican or Democrat
#Keep only bills that are over 66% Democrat or Republican
df1 <- df %>%
filter(party != "Both")
df1 <- df1[sample(nrow(df1)),]
df1$env <- 0
df1$env[df1$party == "Republican"] <- 1
df1$env <- factor(df1$env)
df_split <- initial_split(df1, prop=0.8)
train_data <- training(df_split)
test_data <- testing(df_split)
rec <- recipe(env ~ summary, data = train_data) %>%
step_tokenize(summary) %>%
step_tokenfilter(summary, max_tokens = 1e3) %>%
step_tfidf(summary)
model <- svm_linear(mode="classification")
wf <- workflow() %>%
add_recipe(rec) %>%
add_model(model)
model_fit <- wf %>%
fit(train_data)
test_data$prediction <- predict(model_fit, test_data)$.pred_class
scorer <- metric_set( yardstick::accuracy,
yardstick::precision,
yardstick::recall,
yardstick::f_meas
)
scorer(test_data, truth=env, estimate=prediction, event_level="second")
## # A tibble: 4 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 accuracy binary 0.617
## 2 precision binary 0.630
## 3 recall binary 0.354
## 4 f_meas binary 0.453
#Prediction model to see if can predict if bill is focusing on health (as policy area)
df2 <- df[sample(nrow(df)),]
df2$env <- 0
df2$env[df2$policy_area == "Health"] <- 1
df2$env <- factor(df2$env)
df_split <- initial_split(df2, prop=0.8)
train_data <- training(df_split)
test_data <- testing(df_split)
rec <- recipe(env ~ summary, data = train_data) %>%
step_tokenize(summary) %>%
step_tokenfilter(summary, max_tokens = 1e3) %>%
step_tfidf(summary)
model <- svm_linear(mode="classification")
wf <- workflow() %>%
add_recipe(rec) %>%
add_model(model)
model_fit <- wf %>%
fit(train_data)
test_data$prediction <- predict(model_fit, test_data)$.pred_class
scorer <- metric_set( yardstick::accuracy,
yardstick::precision,
yardstick::recall,
yardstick::f_meas
)
scorer(test_data, truth=env, estimate=prediction, event_level="second")
## Warning: While computing binary `precision()`, no predicted events were detected (i.e. `true_positive + false_positive = 0`).
## Precision is undefined in this case, and `NA` will be returned.
## Note that 8 true event(s) actually occured for the problematic event level, '1'.
## While computing binary `precision()`, no predicted events were detected (i.e. `true_positive + false_positive = 0`).
## Precision is undefined in this case, and `NA` will be returned.
## Note that 8 true event(s) actually occured for the problematic event level, '1'.
## # A tibble: 4 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 accuracy binary 0.957
## 2 precision binary NA
## 3 recall binary 0
## 4 f_meas binary NA